SAS 9.2 PROC PHREG, in the presence of a STRATA statement, incorrectly uses the overall covariate means by default in the BASELINE OUT= data set when no COVARIATES= option/data set is specified.
For SAS 9.2, a work-around is to use a COVARIATES= data set that contains all the stratum-specific covariate means and post-process the OUT= data set to select the appropriate estimates. You can then plot the selected estimates with PROC SGPLOT. See the test program on the Full Code tab above.
Product Family | Product | System | SAS Release | |
Reported | Fixed* | |||
SAS System | SAS/STAT | z/OS | 9.2 TS1M0 | 9.3 TS1M0 |
OpenVMS on HP Integrity | 9.2 TS1M0 | 9.3 TS1M0 | ||
Solaris for x64 | 9.2 TS1M0 | 9.3 TS1M0 | ||
Linux for x64 | 9.2 TS1M0 | 9.3 TS1M0 | ||
Linux | 9.2 TS1M0 | 9.3 TS1M0 | ||
64-bit Enabled Solaris | 9.2 TS1M0 | 9.3 TS1M0 | ||
HP-UX IPF | 9.2 TS1M0 | 9.3 TS1M0 | ||
64-bit Enabled HP-UX | 9.2 TS1M0 | 9.3 TS1M0 | ||
64-bit Enabled AIX | 9.2 TS1M0 | 9.3 TS1M0 | ||
Windows Vista for x64 | 9.2 TS1M0 | 9.3 TS1M0 | ||
Windows Vista | 9.2 TS1M0 | 9.3 TS1M0 | ||
Microsoft Windows XP Professional | 9.2 TS1M0 | 9.3 TS1M0 | ||
Microsoft Windows Server 2003 Standard Edition | 9.2 TS1M0 | 9.3 TS1M0 | ||
Microsoft Windows Server 2003 Enterprise Edition | 9.2 TS1M0 | 9.3 TS1M0 | ||
Microsoft® Windows® for x64 | 9.2 TS1M0 | 9.3 TS1M0 | ||
Microsoft Windows Server 2003 Datacenter Edition | 9.2 TS1M0 | 9.3 TS1M0 | ||
Microsoft Windows XP 64-bit Edition | 9.2 TS1M0 | 9.3 TS1M0 | ||
Microsoft Windows Server 2003 Enterprise 64-bit Edition | 9.2 TS1M0 | 9.3 TS1M0 | ||
Microsoft Windows Server 2003 Datacenter 64-bit Edition | 9.2 TS1M0 | 9.3 TS1M0 | ||
Microsoft® Windows® for 64-Bit Itanium-based Systems | 9.2 TS1M0 | 9.3 TS1M0 |
options ls=80 ps=56 nodate number pageno=1 center;
ods select all;
ods trace off;
ods listing;
title1;
footnote1;
run;
data VALung;
drop check m;
retain Therapy Cell;
infile datalines column=column;
length Check $ 1;
M=Column;
input Check $ @@;
if M>Column then M=1;
if Check='s'|Check='t' then input @M Therapy $ Cell $ ;
else input @M SurvTime Kps DiagTime Age Prior @@;
if SurvTime > .;
censor=(SurvTime<0);
SurvTime=abs(SurvTime);
Treatment=(Therapy='test');
cards;
standard squamous
72 60 7 69 0 411 70 5 64 10 228 60 3 38 0 126 60 9 63 10
118 70 11 65 10 10 20 5 49 0 82 40 10 69 10 110 80 29 68 0
314 50 18 43 0 -100 70 6 70 0 42 60 4 81 0 8 40 58 63 10
144 30 4 63 0 -25 80 9 52 10 11 70 11 48 10
standard small
30 60 3 61 0 384 60 9 42 0 4 40 2 35 0 54 80 4 63 10
13 60 4 56 0 -123 40 3 55 0 -97 60 5 67 0 153 60 14 63 10
59 30 2 65 0 117 80 3 46 0 16 30 4 53 10 151 50 12 69 0
22 60 4 68 0 56 80 12 43 10 21 40 2 55 10 18 20 15 42 0
139 80 2 64 0 20 30 5 65 0 31 75 3 65 0 52 70 2 55 0
287 60 25 66 10 18 30 4 60 0 51 60 1 67 0 122 80 28 53 0
27 60 8 62 0 54 70 1 67 0 7 50 7 72 0 63 50 11 48 0
392 40 4 68 0 10 40 23 67 10
standard adeno
8 20 19 61 10 92 70 10 60 0 35 40 6 62 0 117 80 2 38 0
132 80 5 50 0 12 50 4 63 10 162 80 5 64 0 3 30 3 43 0
95 80 4 34 0
standard large
177 50 16 66 10 162 80 5 62 0 216 50 15 52 0 553 70 2 47 0
278 60 12 63 0 12 40 12 68 10 260 80 5 45 0 200 80 12 41 10
156 70 2 66 0 -182 90 2 62 0 143 90 8 60 0 105 80 11 66 0
103 80 5 38 0 250 70 8 53 10 100 60 13 37 10
test squamous
999 90 12 54 10 112 80 6 60 0 -87 80 3 48 0 -231 50 8 52 10
242 50 1 70 0 991 70 7 50 10 111 70 3 62 0 1 20 21 65 10
587 60 3 58 0 389 90 2 62 0 33 30 6 64 0 25 20 36 63 0
357 70 13 58 0 467 90 2 64 0 201 80 28 52 10 1 50 7 35 0
30 70 11 63 0 44 60 13 70 10 283 90 2 51 0 15 50 13 40 10
test small
25 30 2 69 0 -103 70 22 36 10 21 20 4 71 0 13 30 2 62 0
87 60 2 60 0 2 40 36 44 10 20 30 9 54 10 7 20 11 66 0
24 60 8 49 0 99 70 3 72 0 8 80 2 68 0 99 85 4 62 0
61 70 2 71 0 25 70 2 70 0 95 70 1 61 0 80 50 17 71 0
51 30 87 59 10 29 40 8 67 0
test adeno
24 40 2 60 0 18 40 5 69 10 -83 99 3 57 0 31 80 3 39 0
51 60 5 62 0 90 60 22 50 10 52 60 3 43 0 73 60 3 70 0
8 50 5 66 0 36 70 8 61 0 48 10 4 81 0 7 40 4 58 0
140 70 3 63 0 186 90 3 60 0 84 80 4 62 10 19 50 10 42 0
45 40 3 69 0 80 40 4 63 0
test large
52 60 4 45 0 164 70 15 68 10 19 30 4 39 10 53 60 12 66 0
15 30 5 63 0 43 60 11 49 10 340 80 10 64 10 133 75 1 65 0
111 60 5 64 0 231 70 18 67 10 378 80 4 65 0 49 30 3 37 0
;
/********/
/* DATA */
/********/
title1 "VA Lung Cancer Data";
proc sort data=VALung;
by cell SurvTime;
run;
proc print data=VALung;
by cell;
id cell;
pageby cell;
var SurvTime Censor Age Prior DiagTime Kps;
run;
/*****************************************************************/
/* PHREG - problem */
/* In SAS9.2 a stratified analysis gives overall covariate means */
/* instead of stratum specific covariate means for the default */
/* BASELINE data set */
/*****************************************************************/
/*********/
/* MEANS */
/*********/
/* overall covariate means - averaging over all strata levels */
proc means data=VALung noprint mean;
var Age Prior DiagTime Kps;
output out=CovarMeans_Overall(keep=Age Prior DiagTime Kps)
mean=Age Prior DiagTime Kps;
title1 "overall covariate means";
title2 "averaging over all strata levels";
title3 "'cell' is strata variable here";
run;
proc print data=CovarMeans_Overall noobs;
format Age Prior DiagTime Kps 9.6;
run;
/*********/
/* PHREG */
/*********/
ods graphics on;
ods select CensoredSummary ParameterEstimates ReferenceSet SurvivalPlot;
proc phreg data=VALung plot(overlay=row)=survival;
strata cell;
model SurvTime*Censor(1) = Age Prior DiagTime Kps;
baseline out=out1 survival=survival; * default - overall covariate means (incorrect) *;
title1 "overall covariate means (not stratum specific)";
run;
ods graphics off;
run;
proc sort data=out1;
by cell Age Prior DiagTime Kps;
run;
proc print data=out1;
by cell Age Prior DiagTime Kps;
id cell Age Prior DiagTime Kps;
pageby cell;
title1 "default BASELINE data set";
title2 "has overall covariate means (same for each strata)";
title3 "not stratum specific covariate means";
title4 "when STRATA statement is present";
run;
/***************************************************************************/
/* PHREG - workaround */
/* specify stratum specific covariate means explicitly but still must */
/* post-process the BASELINE OUT= data set to obtain only the stratum */
/* specific covariate means and the associated survival functions. */
/* */
/* PHREG's ODS Graphics does not work well in this situation because PHREG */
/* produces survival curves for each strata at each covariate pattern. */
/* In the VALUNG data there are four strata each with their own stratum */
/* specific covariate means. PHREG uses each of these four individual */
/* covariate means for each srata and produces 16 survival functions. This*/
/* is clunky and not very useful and will be fixed in a future release. */
/***************************************************************************/
/*********/
/* MEANS */
/*********/
/* stratum specific covariate means - means for each strata level individually */
proc means data=VALung noprint mean;
by cell;
var Age Prior DiagTime Kps;
output out=CovarMeans_BY_Strata(keep=cell Age Prior DiagTime Kps)
mean=Age Prior DiagTime Kps;
title1 "stratum specific covariate means";
title2 "means for each strata level individually";
title3 "'cell' is strata variable here";
run;
proc print data=CovarMeans_BY_Strata;
by cell;
id cell;
run;
/*********/
/* PHREG */
/*********/
ods graphics off;
ods select CensoredSummary ParameterEstimates;
proc phreg data=VALung;
strata cell;
model SurvTime*Censor(1) = Age Prior DiagTime Kps;
baseline covariates=CovarMeans_By_Strata out=out2 survival=survival;
title1 "stratum specific covariates means";
run;
ods graphics off;
run;
/*
proc sort data=out2;
by cell age prior diagtime Kps;
run;
proc print data=out2;
by cell age prior diagtime Kps;
id cell age prior diagtime kps;
pageby age;
title1 "default BASELINE data set using stratum specific covariate means as COVARIATES= option";
title2 "unfortunately this data set contains the stratum specific covariate means but";
title3 "crosses these with each strata giving risee to 16 different survival functions";
title4 "below we post-process the BASELINE data set to keep only the strata specific covariate means";
title5 "which gives the four desired survival functions";
run;
*/
data out2_subset;
set out2;
* round off covariate values for manual comparison with
stratum specific covariate means to keep only these four covariate patterns *;
Age = round(Age, .0001);
Prior = round(Prior, .00001);
DiagTime = round(DiagTime,.0001);
Kps = round(Kps, .0001);
if trim(cell)='adeno' then do;
if (Age=57.4074 & Prior=1.85185 & DiagTime=5.6296 & Kps=58.1111) then output;
end;
else if trim(cell)='large' then do;
if (Age=56.2222 & Prior=3.70370 & DiagTime=8.1481 & Kps=65.0000) then output;
end;
else if trim(cell)='small' then do;
if (Age=59.8750 & Prior=2.29167 & DiagTime=9.2500 & Kps=53.5417) then output;
end;
else if trim(cell)='squamous' then do;
if (Age=58.4571 & Prior=4.00000 & DiagTime=11.0286 & Kps=60.8571) then output;
end;
run;
proc sort data=out2_subset;
by cell Age Prior DiagTime Kps;
run;
proc print data=out2_subset;
by cell Age Prior DiagTime Kps;
id cell Age Prior DiagTime Kps;
pageby cell;
title1 "BASELINE data set after post-processing";
title2 "keeping only stratum specific covariate means";
run;
title1;
ods graphics on;
proc sgplot data=out2_subset;
step x=SurvTime y=Survival / group=cell name="survival" legendlabel="Survival";
footnote1 h=1 f=duplex 'survival curves represent covariate means in each stratum';
run;
ods graphics off;
quit;
Type: | Problem Note |
Priority: | alert |
Topic: | Analytics ==> Survival Analysis SAS Reference ==> Procedures ==> PHREG |
Date Modified: | 2011-07-22 14:28:31 |
Date Created: | 2011-07-20 16:53:12 |